##INTRODUCTION

In order to analyze the dataset these packages are required: ggplot2, dplyr, magrittr. Then we import the dataset. This dataset is about Covid-19 pandemic going from 31/12/2019 to 14/12/2020 and has 12 variables: date, day, month, year, cases, deaths, country, code (of the country), population, continent, cases_cum (cumulative cases) and deaths_cum (cumulative deaths).

knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)
library(magrittr)

df <- read.delim("/Users/martina/Desktop/covid.csv", sep=";")

DATA ANALYSIS

This analysis will focus on registered deaths and cases of Covid-19 in five continents: Europe, Asia, Africa, Oceania and America. Since it is a time series, the “date” variable needs to be transformed into the correct format so that the visualization of data is correct.

ord_df<- df[order(as.Date(df$date, format="%d/%m/%Y")),]
ord_df$date <- as.Date(ord_df$date,format = "%d/%m/%Y")

format(as.Date(ord_df$date), "%m/%Y")

DATA VISUALIZATION AND MANIPULATION

##Cases of Covid-19

The visualization process starts at continent-level and then focuses on the most relevant and affected countries.

#plot of Asia 2019-2020 daily cases, on y cases, on x months 
ord_df %>% filter(continent == "Asia") %>% 
  ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#we focus on China and India  
c<- ord_df %>% filter(country == c("China","India")) %>% 
  ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
c

#plot of Europe 
ord_df %>% filter(continent == "Europe") %>% 
  ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot of France, Italy and Spain 
e<- ord_df %>% filter(country == c("Italy", "France", "Spain"))%>% 
  ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
e

#plot of Africa
ord_df %>% filter(continent == "Africa") %>% 
  ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot of South Africa
a<- ord_df %>% filter(country == c("South_Africa", "Morocco")) %>% 
  ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
a

#plot of America 
ord_df %>% filter(continent == "America") %>% 
  ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot United States of America and Brazil 
us<- ord_df %>% filter(country == c("United_States_of_America", "Brazil")) %>% 
  ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
us

#plot of Oceania
ord_df %>% filter(continent == "Oceania") %>% 
  ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot of Australia and French Polynesia 
o<- ord_df %>% filter(country == c("French_Polynesia", "Australia")) %>% 
  ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
o

##Deaths

Moreover these plots show the number of deaths at continent- and country-level. Notice that one of the observation of Spain deaths is negative so there should be an error in the dataset.

ord_df %>% filter(continent == "Asia") %>% 
  ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#we focus on China and India 
c1<- ord_df %>% filter(country == c("China", "India")) %>% 
  ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
c1

#plot of Europe 
ord_df %>% filter(continent == "Europe") %>% 
  ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot of France, Italy and Spain 
e1<- ord_df %>% filter(country == c("Italy", "France", "Spain"))%>% 
  ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
e1

#plot of Africa
ord_df %>% filter(continent == "Africa") %>% 
  ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot of South Africa
a1<- ord_df %>% filter(country == c("South_Africa", "Morocco")) %>% 
  ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
a1

#plot of America 
ord_df %>% filter(continent == "America") %>% 
  ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot United States of America and Brazil 
us1<- ord_df %>% filter(country == c("United_States_of_America", "Brazil")) %>% 
  ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
us1

#plot of Oceania
ord_df %>% filter(continent == "Oceania") %>% 
  ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")

#plot of Australia and French Polynesia 
o1<- ord_df %>% filter(country == c("French_Polynesia", "Australia")) %>% 
  ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
  theme_bw() +
  theme(legend.position = "bottom")
o1

###COMPARISON DEATHS AND CASES FOR COUNTRIES

The following plots compare deaths and cases in the most affected countries.

cowplot:: plot_grid(c,c1, nrow=2, ncol=1)

cowplot:: plot_grid(o,o1, nrow=2, ncol=1)

cowplot:: plot_grid(a,a1, nrow=2, ncol=1)

cowplot:: plot_grid(us,us1, nrow=2, ncol=1)

cowplot:: plot_grid(e,e1, nrow=2, ncol=1)